load("C:/Users/marthinetex/Downloads/ggplot.Rdata")
# Tidy data
library(tidyverse)
library(lubridate)
# Supplementary analysis
library(survival)
library(TraMineR)
library(rstatix)
# plotly
library(plotly)
# ggplot2 and addins
library(ggplot2)
library(ggfortify)
library(ggpubr)
library(gridExtra)
library(ggseqplot)
library(ggh4x)
# Other graphics tools
library(treemap)
library(notly)
# Initialize the plot with the dataset (DATA_ex2_1)
ggplot(DATA_ex2_1) +
# Add a stacked bar chart (geom_col creates bars with heights determined by 'N')
geom_col(
mapping = aes(N, Symptome, fill = Time), # Map 'N' to x, 'Symptome' to y, and 'Temps' to fill color
width = 0.6, # Set the bar width
position = "stack" # Stack bars by the 'fill' variable
) +
# Create facets (separate panels) for each level of the 'Episode' variable
facet_grid(cols = vars(Episode)) +
# Add a vertical line at x = 0 for reference
geom_vline(aes(xintercept = 0)) +
# Customize the x-axis scale
scale_x_continuous(
limits = c(0, 6250), # Set the range for the x-axis
breaks = seq(0, 6000, 1000), # Major tick marks every 1000 units
minor_breaks = seq(500, 5500, 500), # Minor tick marks every 500 units
expand = c(0, 0) # Remove padding on the x-axis
) +
# Add text labels to display 'Ntot' values next to each bar
geom_text(
mapping = aes(Ntot + 25, y = Symptome, label = Ntot), # Position and content of the text
hjust = 0, # Align text to the left of its position
nudge_x = 1 # Slightly nudge text horizontally
) +
# Add titles and subtitles (empty here for now)
labs(title = "", subtitle = "") +
# Customize the overall appearance of the plot
theme(
axis.title = element_blank(), # Remove axis titles
panel.background = element_rect(fill = "white"), # Set panel background to white
panel.grid.major.x = element_line(color = "darkgray"), # Major grid lines for x-axis
panel.grid.minor.x = element_line(colour = "lightgray") # Minor grid lines for x-axis
)
# Initialize the plot with the dataset (DATA_ex2_2)
ggplot(DATA_ex2_2) +
# Add a stacked bar chart
geom_col(
mapping = aes(P, COVID_long_3mod, fill = Q_E1), # Map 'P' to x, 'COVID_long_3mod' to y, and 'Q_E1' to fill
width = 0.6, # Set the bar width
position = "stack", # Stack bars by the 'fill' variable
color = "white" # Add white border to bars
) +
# Create facets (separate rows) for each level of the 'Q_A2' variable
facet_grid(rows = vars(Q_A2)) +
# Add vertical lines at x = 0 and x = 100 for reference
geom_vline(aes(xintercept = 0)) +
geom_vline(aes(xintercept = 100)) +
# Customize the x-axis scale
scale_x_continuous(
limits = c(0, 100.2), # Set the range for the x-axis
breaks = seq(0, 100, 10), # Major tick marks every 10%
minor_breaks = seq(5, 95, 10), # Minor tick marks every 5%
expand = c(0, 0), # Remove padding on the x-axis
labels = paste0(seq(0, 100, 10), "%") # Append "%" to the tick labels
) +
# Add titles and subtitles (empty here for now)
labs(title = " ", subtitle = "") +
# Customize the overall appearance of the plot
theme(
axis.title = element_blank(), # Remove axis titles
# axis.text.x = element_blank(), # Uncomment to hide x-axis labels
# axis.text.y = element_blank(), # Uncomment to hide y-axis labels
panel.background = element_rect(fill = "white"), # Set panel background to white
panel.grid.major.x = element_line(color = "darkgray"), # Major grid lines for x-axis
panel.grid.minor.x = element_line(colour = "lightgray") # Minor grid lines for x-axis
) +
# Customize the fill legend for the bar chart
scale_fill_manual(
"Social interactions", # Title for the legend
values = rev(RColorBrewer::brewer.pal(4, "RdYlGn")), # Reverse color palette
labels = c( # Define labels for legend items
"Very satisfied", # Very satisfactory
"Satisfied", # Somewhat satisfactory
"Unatisfied", # Somewhat unsatisfactory
"Very unsatisfied " # Very unsatisfactory
)
)
ymax <- max(table(DATA_ex1_1$cas_date))
coeff <- (140000 / ymax) * (ymax / 80)
DATA_ex1_2$Freq <- DATA_ex1_2$Freq / coeff
# Initialize the plot with a dataset (DATA_ex1_1)
ggplot(DATA_ex1_1) +
# Add a histogram with dodged bars, colored by the "Incident" variable
geom_histogram(
mapping = aes(x = cas_date, fill = Incident), # Map 'cas_date' to x-axis, and 'Incident' to fill
breaks = monthly_breaks, # Use specified monthly breaks
closed = "left", # Define intervals as left-closed
color = "white", # Set white border for bars
position = "dodge" # Dodge bars side by side
) +
# Add horizontal grid lines with white color
geom_hline(yintercept = 1:ymax, color = "white") +
# Add vertical reference lines for specific dates
geom_vline(
xintercept = as_datetime(c(
"2021-01-01 00:00:00", "2022-01-01 00:00:00"
), tz = "CET"), # Define dates as datetime objects
color = "lavenderblush4", # Set line color
size = 1 # Set line thickness
) +
# Annotate rectangular periods of interest
annotate(
"rect", # Specify rectangle annotation
xmin = as_datetime("2020-04-01 00:00:00", tz = "CET"), # Start of the rectangle
xmax = as_datetime("2020-06-01 00:00:00", tz = "CET"), # End of the rectangle
ymin = 0, # Rectangle starts at y = 0
ymax = ymax, # Rectangle ends at y = ymax
fill = "gray", # Set fill color to gray
alpha = 0.25 # Set transparency
) +
# Repeat annotation for additional periods
annotate("rect", xmin = as_datetime("2020-11-01 00:00:00", tz = "CET"),
xmax = as_datetime("2021-01-01 00:00:00", tz = "CET"), ymin = 0, ymax = ymax, fill = "gray", alpha = 0.25) +
annotate("rect", xmin = as_datetime("2021-04-01 00:00:00", tz = "CET"),
xmax = as_datetime("2021-06-01 00:00:00", tz = "CET"), ymin = 0, ymax = ymax, fill = "gray", alpha = 0.25) +
# Add a line plot for data in DATA_ex1_2
geom_line(
data = DATA_ex1_2,
aes(
x = as_datetime(paste(Var1, "00:00:00"), tz = "CET"), # Convert Var1 to datetime
y = Freq, # Use Freq for y-axis
colour = "Number of vaccine doses" # Set color legend
),
linetype = 5 # Set line type
) +
# Add points on the line plot
geom_point(
data = DATA_ex1_2,
aes(x = as_datetime(paste(Var1, "00:00:00"), tz = "CET"), y = Freq), # Map Var1 and Freq
colour = col_sec, # Set color
shape = 16, # Use solid circles
size = 2 # Set point size
) +
# Format x-axis as datetime
scale_x_datetime(
expand = c(0, 0), # Remove padding
date_breaks = "month", # Major breaks every month
date_minor_breaks = "month", # Minor breaks also monthly
date_labels = "%m/%y" # Format as MM/YY
) +
# Customize color scale for doses
scale_colour_manual("Doses", values = col_sec) +
# Use classic theme for the plot
theme_classic() +
# Modify various theme elements
theme(
panel.grid.minor.y = element_line(colour = "lightgray", size = 1), # Minor grid lines
panel.grid.major.y = element_line(colour = "lavenderblush4", size = 1), # Major grid lines
axis.text.x = element_text(angle = 90), # Rotate x-axis labels
axis.text.y.right = element_text(color = col_sec), # Color right y-axis text
axis.line.y.right = element_line(color = col_sec), # Color right y-axis line
axis.title.y.right = element_text(color = col_sec) # Color right y-axis title
) +
# Format y-axis with a secondary axis
scale_y_continuous(
expand = c(0, 0), # Remove padding
limits = c(0, ymax + 1), # Set y-axis limits
breaks = seq(0, 90, 10), # Major breaks every 10
sec.axis = sec_axis(
trans = ~ . * coeff, # Transform secondary axis
name = "Number of doses dispensed", # Secondary axis title
breaks = floor(seq(0, 140000, length.out = 9)), # Secondary axis breaks
labels = formatC( # Format secondary axis labels
floor(seq(0, 140000, length.out = 9)),
format = "f",
big.mark = " ",
digits = 0
)
)
) +
# Add titles and axis labels
labs(title = "", x = " ", y = "Number of incidents") +
# Add text annotations for specific periods
geom_text(mapping = aes_q(
x = as_datetime("2020-07-01 00:00:00", tz = "CET"), y = 82, label = "Period n°1"
)) +
geom_text(mapping = aes_q(
x = as_datetime("2021-07-01 00:00:00", tz = "CET"), y = 82, label = "Period n°2"
)) +
geom_text(mapping = aes_q(
x = as_datetime("2022-08-01 00:00:00", tz = "CET"), y = 82, label = "Period n°3"
))
# Simulate data for a boxplot
dataBoxplot <- data.frame(
id = rep(1:47, 5), # 47 subjects, repeated 5 times for each group
times = rep(paste0("n°", 1:5), each = 47), # 5 time points
val = c( # Simulate values for each time point with different means and SDs
rnorm(n = 47, mean = 2, sd = 1), # Group 1 (mean=2, sd=1)
rnorm(n = 47, mean = 5, sd = 1), # Group 2 (mean=5, sd=1)
rnorm(n = 47, mean = 5, sd = 2), # Group 3 (mean=5, sd=2)
rnorm(n = 47, mean = 8, sd = 2), # Group 4 (mean=8, sd=2)
rnorm(n = 47, mean = 8, sd = 5) # Group 5 (mean=8, sd=5)
)
)
# Perform ANOVA to test differences between the groups
res.aov <- anova_test(
data = dataBoxplot,
dv = val, # Dependent variable: val (size)
wid = id, # Within-subjects factor: id
between = times # Between-subjects factor: times
)
# Pairwise t-tests to compare each group with group 1, adjusting for multiple comparisons using Bonferroni correction
pwc <- dataBoxplot %>%
pairwise_t_test(
val ~ times, # Dependent variable 'val' across levels of 'times'
paired = TRUE, # Paired samples
ref.group = "n°1", # Use "n°1" as the reference group
p.adjust.method = "bonferroni" # Apply Bonferroni correction to p-values
) %>%
add_xy_position(x = "times") # Add xy-position for displaying p-values
# Create the boxplot with individual data points and p-values
ggboxplot(
dataBoxplot, # Data for the boxplot
x = "times", # Group by 'times'
y = "val", # Plot 'val' (size) on the y-axis
add = "point", # Add individual data points to the boxplot
ylab = "Size", # Label for the y-axis
xlab = "" # No label for the x-axis
) +
stat_pvalue_manual(pwc) + # Add p-values from pairwise t-tests
labs(
subtitle = get_test_label(res.aov, detailed = FALSE), # Subtitle for ANOVA result
caption = "2 by 2 testing: Student's t-test, p-value adjustment: Bonferroni" # Caption explaining test details
)
n <- 1000 # Number of individuals
df <- data.frame(
id = 1:n,
time = as.numeric(sample(1:365, n, replace = TRUE)), # Simulated follow-up time between 1 and 365 days
event = factor(sample(0:2, n, prob = c(0.4, 0.5, 0.1), replace = TRUE)), # Event (0 = censored, 1 = sick, 2 = dead)
exposure = sample(c("Unexposed", "Exposed"), n, replace = TRUE) # Exposure status (Unexposed or Exposed)
)
df$time[df$event == 0] <- 365 # Set time to 365 for censored observations (event == 0)
fit <- survfit(Surv(time = time, event = event) ~ exposure, data = df) # Fitting a Survival Model
# Plot a survival curve with `autoplot` and enhance it with additional layers
autoplot(fit, surv.linetype = 'blank', surv.colour = NA) +
# Add step lines representing survival curves
geom_step(
aes(linetype = strata, color = event), # Map line type to `strata` and color to `event`
size = 0.8 # Set line thickness
) +
# Customize the line types for survival curves
scale_linetype_manual(
name = "Group", # Title for the legend
values = c('dashed', 'solid'), # Define line types: dashed for unexposed, solid for exposed
labels = c('Unexposed', 'Exposed') # Labels for the legend
) +
# Customize the colors for the event states
scale_color_manual(
name = "State", # Title for the legend
values = c("#AAC0AF", "#1C4073", "#B28B84"), # Colors for Healthy, Sick, and Dead states
labels = c('Healthy', 'Sick', 'Dead') # Labels for the legend
) +
# Add axis labels and a title
labs(
x = "Followup period (days)", # X-axis label
y = "Proportion of the population", # Y-axis label
title = "" # Title (empty for now)
) +
# Add a vertical reference line at x = 0
geom_vline(
aes(xintercept = 0), # Position of the line
size = 1 # Thickness of the line
) +
# Customize the y-axis scale
scale_y_continuous(
limits = c(0, 1), # Set the y-axis range from 0 to 1
breaks = seq(0, 1, 0.1), # Major ticks every 0.1
labels = paste0(seq(0, 100, 10), "%"), # Convert tick labels to percentages
expand = c(0, 0) # Remove padding on the y-axis
) +
# Customize the x-axis scale
scale_x_continuous(
breaks = seq(0, 360, 60), # Major ticks every 60 days
expand = c(0, 0) # Remove padding on the x-axis
) +
# Apply a minimal theme for a clean appearance
theme_minimal() +
# Customize specific theme elements
theme(
plot.title = element_text(hjust = 0.5), # Center-align the plot title
axis.title.x = element_text(
face = "bold", # Make x-axis title bold
colour = "black", # Set title color to black
size = 12 # Set font size
),
axis.title.y = element_text(
face = "bold", # Make y-axis title bold
colour = "black", # Set title color to black
size = 12 # Set font size
),
legend.title = element_text(face = "bold", size = 10), # Style legend titles
panel.grid.major.y = element_line(
colour = "lightgray", # Light gray major grid lines on the y-axis
size = 0.3 # Set line thickness
),
panel.grid.minor.x = element_blank() # Remove minor grid lines on the x-axis
)
plotSankey <- function(Nodes, Links, add.prop = FALSE) {
# Function to create a Sankey diagram using plotly.
# Args:
# Nodes: Data frame containing node details (labels, colors, etc.).
# Links: Data frame containing link details (source, target, values, colors).
# add.prop: Logical. If TRUE, adds percentage proportions to node labels.
if (add.prop) {
# Check if all sources in Links have an equal number of occurrences
if (all(table(Links$source) == table(Links$source)[1])) {
nrow_per_times <- table(Links$source)[1] ^ 2 # Determine rows per iteration
n_times <- nrow(Links) / nrow_per_times # Calculate number of iterations
for (i in 1:n_times) {
# Extract subset of Links for current iteration
tab <- Links[((i - 1) * nrow_per_times + 1):((i - 1) * nrow_per_times + nrow_per_times), ]
# Calculate proportions for targets
vals <- as.numeric(by(tab$value, tab$target, sum)) # Sum values by target
Nodes[Nodes$X %in% (unique(tab$target) + 1), "label"] <- paste0(
Nodes[Nodes$X %in% (unique(tab$target) + 1), "label"],
" (",
formatC(vals / sum(vals) * 100, digits = 1, format = 'f'), # Format as percentage
"%)"
)
if (i == 1) {
# Calculate proportions for sources during the first iteration
vals <- as.numeric(by(tab$value, tab$source, sum)) # Sum values by source
Nodes[Nodes$X %in% (unique(tab$source) + 1), "label"] <- paste0(
Nodes[Nodes$X %in% (unique(tab$source) + 1), "label"],
" (",
formatC(vals / sum(vals) * 100, digits = 1, format = 'f'), # Format as percentage
"%)"
)
}
}
} else {
warning(
"Links arg does not have equal number of each source and function cannot automatically calculate proportions."
)
}
}
# Convert colors in Links to RGBA format with transparency
Links$color <- apply(grDevices::col2rgb(Links$color), 2, function(x) {
paste0("rgba(", x[1], ",", x[2], ",", x[3], ",0.4)") # Add 40% transparency
})
# Create the Sankey diagram using plotly
fig <- plot_ly(
type = "sankey", # Specify Sankey diagram
orientation = "h", # Horizontal orientation
alpha_stroke = 0.2, # Node border transparency
node = list(
label = Nodes$label, # Node labels
color = Nodes$color, # Node colors
pad = 15, # Padding between nodes
thickness = 20, # Node thickness
line = list(color = "black", width = 0.5) # Node border style
),
link = list(
source = Links$source, # Source nodes for links
target = Links$target, # Target nodes for links
value = Links$value, # Link values
color = Links$color # Link colors (RGBA)
)
)
# Customize the layout of the Sankey diagram
fig <- fig %>% layout(
font = list(
size = 14, # Font size for labels
color = "black", # Font color
weight = "bold" # Font weight
)
)
# Return the generated plot
fig
}
plotSankey(sankeyData$Nodes, sankeyData$Links, add.prop = TRUE)
# Create a dataset simulating treatment sequences
carpetData <- data.frame(
T1 = sample( # Initial treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 1000, prob = c(0.95, 0.025, 0.025), replace = TRUE
),
T2 = sample( # Second treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 1000, prob = c(0.75, 0.125, 0.125), replace = TRUE
),
T3 = sample( # Third treatment stage
c("Initial treatment", "Substitution", "No treatment"),
size = 1000, prob = c(0.5, 0.25, 0.25), replace = TRUE
)
) %>%
group_by(T1, T2, T3) %>% # Group data by unique sequences
summarise(w = n()) # Summarize the weight (frequency) of each sequence
# Define a sequence object using the TraMineR package
seqCarpet <- seqdef(
data = carpetData[, c("T1", "T2", "T3")], # Extract sequence columns
weights = carpetData$w, # Use weights for the sequences
cpal = c("#AAC0AF", "#B28B84", "#1C4073"), # Custom color palette for states
cnames = c("T1", "T2", "T3") # Custom names for the sequence stages
)
# Define substitution costs using a constant method
couts <- seqsubm(seqCarpet, method = "CONSTANT", cval = 2)
# Compute optimal matching distances
seq.om <- seqdist(seqCarpet,
method = "OM", # Optimal Matching (OM) method
indel = 1, # Insertion/deletion cost
sm = couts) # Substitution matrix
# Perform hierarchical clustering on the sequence distances
seq.dist <- hclust(as.dist(seq.om), method = "ward.D2")
# Create sequence clusters by cutting the dendrogram into 2 groups
seq_cl <- factor(cutree(seq.dist, 2), labels = c("Class n°1", "Class n°2"))
# Plot individual sequence plots and group sequence plots side by side
ggarrange(
ggseqiplot(seqCarpet, facet_ncol = 1, no.n = TRUE) + # Individual plot
theme(
panel.spacing = unit(0.1, "lines"), # Adjust panel spacing
axis.text.y = element_text(colour = "white"), # Hide y-axis text
axis.ticks.y = element_line(colour = "white") # Hide y-axis ticks
) +
labs(y = ""), # Remove y-axis label
ggseqiplot( # Plot grouped sequences
seqCarpet,
group = seq_cl, # Group by clusters
facet_ncol = 1,
no.n = TRUE
) +
force_panelsizes(rows = table(seq_cl)) + # Adjust panel sizes by group
theme(
panel.spacing = unit(0.1, "lines"), # Adjust panel spacing
axis.text.y = element_text(colour = "white"), # Hide y-axis text
axis.ticks.y = element_line(colour = "white") # Hide y-axis ticks
) +
labs(y = ""), # Remove y-axis label
ncol = 2, # Arrange plots in 2 columns
nrow = 1, # Arrange plots in 1 row
common.legend = TRUE, # Use a common legend
legend = "bottom" # Place legend at the bottom
)
dataTreemap <- data.frame(
cause_of_death = c(
"Cancer",
rep("Non cancerous diseases", 6),
rep("Road accidents", 2),
rep("Other or unknown causes", 4),
"Suicide"
),
details = c(
NA,
"Cardiac",
"Respiratory",
"Mental disorders",
"Digestive",
"Endocrinian",
"Other",
"Car",
"Other",
"Weapons",
"Poorly defined",
"Undefined",
"Other",
NA
),
n = c(62, 19, 2, 1, 3, 2, 8, 7, 26, 5, 14, 8, 8, 52)
)
dataTreemap <- as.data.frame(t(apply(dataTreemap, 1, function(x) {
x[1] <- paste0(x[1], " (n=", sum(dataTreemap$n[dataTreemap$cause_of_death == x[1]]), ")")
x
})))
dataTreemap$n <- as.numeric(dataTreemap$n)
treemap(
dataTreemap,
index = c("cause_of_death", "details"),
vSize = "n",
type = "index",
algorithm = "pivotSize",
title = "",
align.labels = list(
c("left", "top"), c("left", "bottom")),
palette = "Pastel1",
fontsize.title = 22,
fontcolor.labels = "black",
bg.labels = 0,
aspRatio = 1
)